In [1]:
import bamboolib as bam
import pandas as pd
df = pd.read_csv('C:/Users/lenovo/Desktop/salary-data-cleaned.csv')
import pandas as pd; import numpy as np
# Step: Drop columns
df2 = df.drop(columns=['Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'])
import pandas as pd; import numpy as np
# Step: Drop rows where Education Level is one of: phD
df3 = df2.loc[~(df2['Education Level'].isin(['phD']))]
df3
import plotly.express as px
fig = px.histogram(df3.dropna(subset=['Education Level', 'Salary']), x='Salary', y='Education Level', color='Gender')
fig
In [2]:
import plotly.express as px
fig = px.box(df3.dropna(subset=['Education Level']), x='Education Level', y='Age')
fig
In [3]:
df3
      Age  Gender Education Level                    Job Title  \
0      21  Female     High School  Junior Sales Representative   
1      21  Female     High School  Junior Sales Representative   
2      21  Female     High School  Junior Sales Representative   
3      21  Female     High School  Junior Sales Representative   
4      21  Female     High School  Junior Sales Representative   
...   ...     ...             ...                          ...   
6697   62    Male             PhD    Software Engineer Manager   
6698   62    Male             PhD    Software Engineer Manager   
6699   62    Male             PhD    Software Engineer Manager   
6700   62    Male             PhD    Software Engineer Manager   
6701   62    Male             PhD    Software Engineer Manager   

      Years of Experience    Salary  
0                     0.0   25000.0  
1                     0.0   25000.0  
2                     0.0   25000.0  
3                     0.0   25000.0  
4                     0.0   25000.0  
...                   ...       ...  
6697                 19.0  200000.0  
6698                 20.0  200000.0  
6699                 19.0  200000.0  
6700                 20.0  200000.0  
6701                 19.0  200000.0  

[6701 rows x 6 columns]
In [ ]: